{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "1. 数据读取"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>transID</th>\n",
       "      <th>cumid</th>\n",
       "      <th>time</th>\n",
       "      <th>amount</th>\n",
       "      <th>type_label</th>\n",
       "      <th>type</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>9407</td>\n",
       "      <td>10001</td>\n",
       "      <td>14JUN09:17:58:34</td>\n",
       "      <td>199.0</td>\n",
       "      <td>正常</td>\n",
       "      <td>Normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>9625</td>\n",
       "      <td>10001</td>\n",
       "      <td>16JUN09:15:09:13</td>\n",
       "      <td>369.0</td>\n",
       "      <td>正常</td>\n",
       "      <td>Normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>11837</td>\n",
       "      <td>10001</td>\n",
       "      <td>01JUL09:14:50:36</td>\n",
       "      <td>369.0</td>\n",
       "      <td>正常</td>\n",
       "      <td>Normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>26629</td>\n",
       "      <td>10001</td>\n",
       "      <td>14DEC09:18:05:32</td>\n",
       "      <td>359.0</td>\n",
       "      <td>正常</td>\n",
       "      <td>Normal</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>30850</td>\n",
       "      <td>10001</td>\n",
       "      <td>12APR10:13:02:20</td>\n",
       "      <td>399.0</td>\n",
       "      <td>正常</td>\n",
       "      <td>Normal</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   transID  cumid              time  amount type_label    type\n",
       "0     9407  10001  14JUN09:17:58:34   199.0         正常  Normal\n",
       "1     9625  10001  16JUN09:15:09:13   369.0         正常  Normal\n",
       "2    11837  10001  01JUL09:14:50:36   369.0         正常  Normal\n",
       "3    26629  10001  14DEC09:18:05:32   359.0         正常  Normal\n",
       "4    30850  10001  12APR10:13:02:20   399.0         正常  Normal"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "data = pd.read_csv(r'./RFM_TRAD_FLOW.csv', encoding='gbk')  \n",
    "data.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "2. 数据探索"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 26662 entries, 0 to 26661\n",
      "Data columns (total 6 columns):\n",
      " #   Column      Non-Null Count  Dtype  \n",
      "---  ------      --------------  -----  \n",
      " 0   transID     26662 non-null  int64  \n",
      " 1   cumid       26662 non-null  int64  \n",
      " 2   time        26662 non-null  object \n",
      " 3   amount      26662 non-null  float64\n",
      " 4   type_label  26662 non-null  object \n",
      " 5   type        26662 non-null  object \n",
      "dtypes: float64(1), int64(2), object(3)\n",
      "memory usage: 1.2+ MB\n"
     ]
    }
   ],
   "source": [
    "# 查看数据的基本信息\n",
    "data.info()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "transID       0.0\n",
       "cumid         0.0\n",
       "time          0.0\n",
       "amount        0.0\n",
       "type_label    0.0\n",
       "type          0.0\n",
       "dtype: float64"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#查看数据缺失值\n",
    "data.isnull().sum()   #缺失数量 \n",
    "data.isnull().mean()   #缺失比例"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0        14JUN09:17:58:34\n",
       "1        16JUN09:15:09:13\n",
       "2        01JUL09:14:50:36\n",
       "3        14DEC09:18:05:32\n",
       "4        12APR10:13:02:20\n",
       "               ...       \n",
       "26657    06JUN10:16:14:31\n",
       "26658    16JUN10:16:18:40\n",
       "26659    26JUN10:21:01:55\n",
       "26660    19JUL09:13:51:45\n",
       "26661    22JUN10:15:42:43\n",
       "Name: time, Length: 26662, dtype: object"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.time"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "3. 特征工程"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "import time "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "#封装成一个函数，对整理数据进行格式化\n",
    "def to_time(t):\n",
    "    out_t=pd.to_datetime(time.mktime(time.strptime(t,'%d%b%y:%H:%M:%S')),unit='s') \n",
    "    return out_t"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>transID</th>\n",
       "      <th>cumid</th>\n",
       "      <th>time</th>\n",
       "      <th>amount</th>\n",
       "      <th>type_label</th>\n",
       "      <th>type</th>\n",
       "      <th>time_new</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>9407</td>\n",
       "      <td>10001</td>\n",
       "      <td>14JUN09:17:58:34</td>\n",
       "      <td>199.0</td>\n",
       "      <td>正常</td>\n",
       "      <td>Normal</td>\n",
       "      <td>2009-06-14 09:58:34</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>9625</td>\n",
       "      <td>10001</td>\n",
       "      <td>16JUN09:15:09:13</td>\n",
       "      <td>369.0</td>\n",
       "      <td>正常</td>\n",
       "      <td>Normal</td>\n",
       "      <td>2009-06-16 07:09:13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>11837</td>\n",
       "      <td>10001</td>\n",
       "      <td>01JUL09:14:50:36</td>\n",
       "      <td>369.0</td>\n",
       "      <td>正常</td>\n",
       "      <td>Normal</td>\n",
       "      <td>2009-07-01 06:50:36</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>26629</td>\n",
       "      <td>10001</td>\n",
       "      <td>14DEC09:18:05:32</td>\n",
       "      <td>359.0</td>\n",
       "      <td>正常</td>\n",
       "      <td>Normal</td>\n",
       "      <td>2009-12-14 10:05:32</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>30850</td>\n",
       "      <td>10001</td>\n",
       "      <td>12APR10:13:02:20</td>\n",
       "      <td>399.0</td>\n",
       "      <td>正常</td>\n",
       "      <td>Normal</td>\n",
       "      <td>2010-04-12 05:02:20</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   transID  cumid              time  amount type_label    type  \\\n",
       "0     9407  10001  14JUN09:17:58:34   199.0         正常  Normal   \n",
       "1     9625  10001  16JUN09:15:09:13   369.0         正常  Normal   \n",
       "2    11837  10001  01JUL09:14:50:36   369.0         正常  Normal   \n",
       "3    26629  10001  14DEC09:18:05:32   359.0         正常  Normal   \n",
       "4    30850  10001  12APR10:13:02:20   399.0         正常  Normal   \n",
       "\n",
       "             time_new  \n",
       "0 2009-06-14 09:58:34  \n",
       "1 2009-06-16 07:09:13  \n",
       "2 2009-07-01 06:50:36  \n",
       "3 2009-12-14 10:05:32  \n",
       "4 2010-04-12 05:02:20  "
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#调用函数\n",
    "data[\"time_new\"]= data.time.apply(to_time)\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "4. RFM模型计算"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "R计算"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Timestamp('2010-09-25 13:17:30')"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 最近一次消费时间\n",
    "data.time_new.max()  #定义2010-09-26为取数时间"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>time</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cumid</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>10001</th>\n",
       "      <td>2010-09-17 04:47:43</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10002</th>\n",
       "      <td>2010-07-03 03:43:16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10003</th>\n",
       "      <td>2010-08-28 08:04:25</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10004</th>\n",
       "      <td>2010-08-29 04:44:52</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10005</th>\n",
       "      <td>2010-08-18 10:25:36</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                     time\n",
       "cumid                    \n",
       "10001 2010-09-17 04:47:43\n",
       "10002 2010-07-03 03:43:16\n",
       "10003 2010-08-28 08:04:25\n",
       "10004 2010-08-29 04:44:52\n",
       "10005 2010-08-18 10:25:36"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "R_trans = pd.DataFrame(data.groupby(['cumid'])['time_new'].max())\n",
    "R_trans.columns=['time']\n",
    "R_trans.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>time</th>\n",
       "      <th>R</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cumid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>10001</th>\n",
       "      <td>2010-09-17 04:47:43</td>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10002</th>\n",
       "      <td>2010-07-03 03:43:16</td>\n",
       "      <td>84</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10003</th>\n",
       "      <td>2010-08-28 08:04:25</td>\n",
       "      <td>28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10004</th>\n",
       "      <td>2010-08-29 04:44:52</td>\n",
       "      <td>27</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10005</th>\n",
       "      <td>2010-08-18 10:25:36</td>\n",
       "      <td>38</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40296</th>\n",
       "      <td>2010-09-09 12:39:43</td>\n",
       "      <td>16</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40297</th>\n",
       "      <td>2010-09-10 04:20:46</td>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40298</th>\n",
       "      <td>2010-09-12 11:40:26</td>\n",
       "      <td>13</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40299</th>\n",
       "      <td>2010-09-23 13:36:13</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40300</th>\n",
       "      <td>2010-07-19 13:37:27</td>\n",
       "      <td>67</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1200 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                     time   R\n",
       "cumid                        \n",
       "10001 2010-09-17 04:47:43   8\n",
       "10002 2010-07-03 03:43:16  84\n",
       "10003 2010-08-28 08:04:25  28\n",
       "10004 2010-08-29 04:44:52  27\n",
       "10005 2010-08-18 10:25:36  38\n",
       "...                   ...  ..\n",
       "40296 2010-09-09 12:39:43  16\n",
       "40297 2010-09-10 04:20:46  15\n",
       "40298 2010-09-12 11:40:26  13\n",
       "40299 2010-09-23 13:36:13   1\n",
       "40300 2010-07-19 13:37:27  67\n",
       "\n",
       "[1200 rows x 2 columns]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "R_trans['R'] = (data.time_new.max() - R_trans.time).dt.days\n",
    "R_trans"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "F计算"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>transID</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cumid</th>\n",
       "      <th>type</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">10001</th>\n",
       "      <th>Normal</th>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Presented</th>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Special_offer</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>returned_goods</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">10002</th>\n",
       "      <th>Normal</th>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Presented</th>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>returned_goods</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">10003</th>\n",
       "      <th>Normal</th>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Presented</th>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Special_offer</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                      transID\n",
       "cumid type                   \n",
       "10001 Normal               15\n",
       "      Presented             8\n",
       "      Special_offer         2\n",
       "      returned_goods        2\n",
       "10002 Normal               12\n",
       "      Presented             5\n",
       "      returned_goods        1\n",
       "10003 Normal               15\n",
       "      Presented             8\n",
       "      Special_offer         1"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 消费频率\n",
    "F = data.groupby(['cumid', 'type'])[['transID']].count()\n",
    "F.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"4\" halign=\"left\">transID</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>type</th>\n",
       "      <th>Normal</th>\n",
       "      <th>Presented</th>\n",
       "      <th>Special_offer</th>\n",
       "      <th>returned_goods</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cumid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>10001</th>\n",
       "      <td>15.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10002</th>\n",
       "      <td>12.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10003</th>\n",
       "      <td>15.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10004</th>\n",
       "      <td>15.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10005</th>\n",
       "      <td>8.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      transID                                       \n",
       "type   Normal Presented Special_offer returned_goods\n",
       "cumid                                               \n",
       "10001    15.0       8.0           2.0            2.0\n",
       "10002    12.0       5.0           NaN            1.0\n",
       "10003    15.0       8.0           1.0            1.0\n",
       "10004    15.0      12.0           2.0            1.0\n",
       "10005     8.0       5.0           NaN            1.0"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 取消堆叠\n",
    "F_trans = F.unstack()\n",
    "F_trans.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "MultiIndex([('transID',         'Normal'),\n",
       "            ('transID',      'Presented'),\n",
       "            ('transID',  'Special_offer'),\n",
       "            ('transID', 'returned_goods')],\n",
       "           names=[None, 'type'])"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "F_trans.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>type</th>\n",
       "      <th>Normal</th>\n",
       "      <th>Presented</th>\n",
       "      <th>Special_offer</th>\n",
       "      <th>returned_goods</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cumid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>10001</th>\n",
       "      <td>15.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10002</th>\n",
       "      <td>12.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10003</th>\n",
       "      <td>15.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10004</th>\n",
       "      <td>15.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10005</th>\n",
       "      <td>8.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "type   Normal  Presented  Special_offer  returned_goods\n",
       "cumid                                                  \n",
       "10001    15.0        8.0            2.0             2.0\n",
       "10002    12.0        5.0            NaN             1.0\n",
       "10003    15.0        8.0            1.0             1.0\n",
       "10004    15.0       12.0            2.0             1.0\n",
       "10005     8.0        5.0            NaN             1.0"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "F_trans.columns = F_trans.columns.droplevel(0)\n",
    "F_trans.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "type\n",
       "Normal              0\n",
       "Presented           0\n",
       "Special_offer     273\n",
       "returned_goods    504\n",
       "dtype: int64"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "F_trans.isnull().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>type</th>\n",
       "      <th>Normal</th>\n",
       "      <th>Presented</th>\n",
       "      <th>Special_offer</th>\n",
       "      <th>returned_goods</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cumid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>10001</th>\n",
       "      <td>15.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10002</th>\n",
       "      <td>12.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10003</th>\n",
       "      <td>15.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10004</th>\n",
       "      <td>15.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10005</th>\n",
       "      <td>8.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10006</th>\n",
       "      <td>10.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10007</th>\n",
       "      <td>15.0</td>\n",
       "      <td>9.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10008</th>\n",
       "      <td>17.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10009</th>\n",
       "      <td>13.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10010</th>\n",
       "      <td>8.0</td>\n",
       "      <td>11.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "type   Normal  Presented  Special_offer  returned_goods\n",
       "cumid                                                  \n",
       "10001    15.0        8.0            2.0             2.0\n",
       "10002    12.0        5.0            0.0             1.0\n",
       "10003    15.0        8.0            1.0             1.0\n",
       "10004    15.0       12.0            2.0             1.0\n",
       "10005     8.0        5.0            0.0             1.0\n",
       "10006    10.0        3.0            1.0             3.0\n",
       "10007    15.0        9.0            1.0             1.0\n",
       "10008    17.0       12.0            3.0             3.0\n",
       "10009    13.0        5.0            0.0             1.0\n",
       "10010     8.0       11.0            3.0             NaN"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "F_trans['Special_offer'] = F_trans['Special_offer'].fillna(0)\n",
    "F_trans.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "#填补缺失，因为退回、赠送商品不计入客户价值，不填充"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>type</th>\n",
       "      <th>Normal</th>\n",
       "      <th>Presented</th>\n",
       "      <th>Special_offer</th>\n",
       "      <th>returned_goods</th>\n",
       "      <th>F</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cumid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>10001</th>\n",
       "      <td>15.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.117647</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10002</th>\n",
       "      <td>12.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10003</th>\n",
       "      <td>15.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.062500</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10004</th>\n",
       "      <td>15.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.117647</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10005</th>\n",
       "      <td>8.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "type   Normal  Presented  Special_offer  returned_goods         F\n",
       "cumid                                                            \n",
       "10001    15.0        8.0            2.0             2.0  0.117647\n",
       "10002    12.0        5.0            0.0             1.0  0.000000\n",
       "10003    15.0        8.0            1.0             1.0  0.062500\n",
       "10004    15.0       12.0            2.0             1.0  0.117647\n",
       "10005     8.0        5.0            0.0             1.0  0.000000"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#特价商品用0填充\n",
    "F_trans[\"F\"]=F_trans['Special_offer']/(F_trans['Special_offer']+F_trans['Normal'])\n",
    "F_trans.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "type\n",
       "Normal              0\n",
       "Presented           0\n",
       "Special_offer       0\n",
       "returned_goods    504\n",
       "F                   0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "F_trans.isnull().sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "M计算"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th>amount</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cumid</th>\n",
       "      <th>type</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th rowspan=\"4\" valign=\"top\">10001</th>\n",
       "      <th>Normal</th>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Presented</th>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Special_offer</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>returned_goods</th>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">10002</th>\n",
       "      <th>Normal</th>\n",
       "      <td>12</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Presented</th>\n",
       "      <td>5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>returned_goods</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th rowspan=\"3\" valign=\"top\">10003</th>\n",
       "      <th>Normal</th>\n",
       "      <td>15</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Presented</th>\n",
       "      <td>8</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Special_offer</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                      amount\n",
       "cumid type                  \n",
       "10001 Normal              15\n",
       "      Presented            8\n",
       "      Special_offer        2\n",
       "      returned_goods       2\n",
       "10002 Normal              12\n",
       "      Presented            5\n",
       "      returned_goods       1\n",
       "10003 Normal              15\n",
       "      Presented            8\n",
       "      Special_offer        1"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 消费金额\n",
    "M = data.groupby(['cumid', 'type'])[['amount']].count()\n",
    "M.head(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr:last-of-type th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"4\" halign=\"left\">amount</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>type</th>\n",
       "      <th>Normal</th>\n",
       "      <th>Presented</th>\n",
       "      <th>Special_offer</th>\n",
       "      <th>returned_goods</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cumid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>10001</th>\n",
       "      <td>15.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10002</th>\n",
       "      <td>12.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10003</th>\n",
       "      <td>15.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10004</th>\n",
       "      <td>15.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10005</th>\n",
       "      <td>8.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      amount                                       \n",
       "type  Normal Presented Special_offer returned_goods\n",
       "cumid                                              \n",
       "10001   15.0       8.0           2.0            2.0\n",
       "10002   12.0       5.0           NaN            1.0\n",
       "10003   15.0       8.0           1.0            1.0\n",
       "10004   15.0      12.0           2.0            1.0\n",
       "10005    8.0       5.0           NaN            1.0"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 取消堆叠\n",
    "M_trans = M.unstack()\n",
    "M_trans.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>type</th>\n",
       "      <th>Normal</th>\n",
       "      <th>Presented</th>\n",
       "      <th>Special_offer</th>\n",
       "      <th>returned_goods</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cumid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>10001</th>\n",
       "      <td>15.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10002</th>\n",
       "      <td>12.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10003</th>\n",
       "      <td>15.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10004</th>\n",
       "      <td>15.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10005</th>\n",
       "      <td>8.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "type   Normal  Presented  Special_offer  returned_goods\n",
       "cumid                                                  \n",
       "10001    15.0        8.0            2.0             2.0\n",
       "10002    12.0        5.0            NaN             1.0\n",
       "10003    15.0        8.0            1.0             1.0\n",
       "10004    15.0       12.0            2.0             1.0\n",
       "10005     8.0        5.0            NaN             1.0"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "M_trans.columns = M_trans.columns.droplevel(0)\n",
    "M_trans.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>type</th>\n",
       "      <th>Normal</th>\n",
       "      <th>Presented</th>\n",
       "      <th>Special_offer</th>\n",
       "      <th>returned_goods</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cumid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>10001</th>\n",
       "      <td>15.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10002</th>\n",
       "      <td>12.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10003</th>\n",
       "      <td>15.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10004</th>\n",
       "      <td>15.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10005</th>\n",
       "      <td>8.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "type   Normal  Presented  Special_offer  returned_goods\n",
       "cumid                                                  \n",
       "10001    15.0        8.0            2.0             2.0\n",
       "10002    12.0        5.0            0.0             1.0\n",
       "10003    15.0        8.0            1.0             1.0\n",
       "10004    15.0       12.0            2.0             1.0\n",
       "10005     8.0        5.0            0.0             1.0"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 填补缺失值\n",
    "M_trans['Special_offer'] = M_trans['Special_offer'].fillna(0)\n",
    "M_trans['returned_goods'] = M_trans['returned_goods'].fillna(0)\n",
    "M_trans.head()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th>type</th>\n",
       "      <th>Normal</th>\n",
       "      <th>Presented</th>\n",
       "      <th>Special_offer</th>\n",
       "      <th>returned_goods</th>\n",
       "      <th>M</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cumid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>10001</th>\n",
       "      <td>15.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>19.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10002</th>\n",
       "      <td>12.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>13.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10003</th>\n",
       "      <td>15.0</td>\n",
       "      <td>8.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>17.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10004</th>\n",
       "      <td>15.0</td>\n",
       "      <td>12.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>18.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10005</th>\n",
       "      <td>8.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>9.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "type   Normal  Presented  Special_offer  returned_goods     M\n",
       "cumid                                                        \n",
       "10001    15.0        8.0            2.0             2.0  19.0\n",
       "10002    12.0        5.0            0.0             1.0  13.0\n",
       "10003    15.0        8.0            1.0             1.0  17.0\n",
       "10004    15.0       12.0            2.0             1.0  18.0\n",
       "10005     8.0        5.0            0.0             1.0   9.0"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "M_trans[\"M\"]=M_trans['Normal']+M_trans['Special_offer']+M_trans['returned_goods']\n",
    "M_trans.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "模型整合"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>R</th>\n",
       "      <th>F</th>\n",
       "      <th>M</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cumid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>10001</th>\n",
       "      <td>8</td>\n",
       "      <td>0.117647</td>\n",
       "      <td>19.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10002</th>\n",
       "      <td>84</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>13.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10003</th>\n",
       "      <td>28</td>\n",
       "      <td>0.062500</td>\n",
       "      <td>17.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10004</th>\n",
       "      <td>27</td>\n",
       "      <td>0.117647</td>\n",
       "      <td>18.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10005</th>\n",
       "      <td>38</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>9.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "        R         F     M\n",
       "cumid                    \n",
       "10001   8  0.117647  19.0\n",
       "10002  84  0.000000  13.0\n",
       "10003  28  0.062500  17.0\n",
       "10004  27  0.117647  18.0\n",
       "10005  38  0.000000   9.0"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#拼接\n",
    "analysis=pd.concat([R_trans[\"R\"],F_trans[\"F\"],M_trans[\"M\"]],axis=1)\n",
    "analysis.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "5. RFM分段打分"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "5.1 F1 函数映射"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "R\n",
       "(-0.001, 12.0]    618\n",
       "(12.0, 113.0]     582\n",
       "Name: count, dtype: int64"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#等频分箱\n",
    "pd.qcut(R_trans['R'],2,retbins=True)[0].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "12.0"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#获取分箱阈值\n",
    "pd.qcut(R_trans['R'],2,retbins=True)[1][1]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "#定义分段函数\n",
    "def R_cut(x):\n",
    "    if x<=12:\n",
    "        return 1 \n",
    "    else:\n",
    "        return 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>R</th>\n",
       "      <th>F</th>\n",
       "      <th>M</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cumid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>10001</th>\n",
       "      <td>1</td>\n",
       "      <td>0.117647</td>\n",
       "      <td>19.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10002</th>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>13.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10003</th>\n",
       "      <td>0</td>\n",
       "      <td>0.062500</td>\n",
       "      <td>17.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10004</th>\n",
       "      <td>0</td>\n",
       "      <td>0.117647</td>\n",
       "      <td>18.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10005</th>\n",
       "      <td>0</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>9.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40296</th>\n",
       "      <td>0</td>\n",
       "      <td>0.071429</td>\n",
       "      <td>15.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40297</th>\n",
       "      <td>0</td>\n",
       "      <td>0.227273</td>\n",
       "      <td>24.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40298</th>\n",
       "      <td>0</td>\n",
       "      <td>0.285714</td>\n",
       "      <td>14.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40299</th>\n",
       "      <td>1</td>\n",
       "      <td>0.058824</td>\n",
       "      <td>17.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40300</th>\n",
       "      <td>0</td>\n",
       "      <td>0.090909</td>\n",
       "      <td>14.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1200 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       R         F     M\n",
       "cumid                   \n",
       "10001  1  0.117647  19.0\n",
       "10002  0  0.000000  13.0\n",
       "10003  0  0.062500  17.0\n",
       "10004  0  0.117647  18.0\n",
       "10005  0  0.000000   9.0\n",
       "...   ..       ...   ...\n",
       "40296  0  0.071429  15.0\n",
       "40297  0  0.227273  24.0\n",
       "40298  0  0.285714  14.0\n",
       "40299  1  0.058824  17.0\n",
       "40300  0  0.090909  14.0\n",
       "\n",
       "[1200 rows x 3 columns]"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "analysis.R = analysis.R.apply(R_cut)\n",
    "analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>R</th>\n",
       "      <th>F</th>\n",
       "      <th>M</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cumid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>10001</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>19.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10002</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>13.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10003</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>17.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10004</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>18.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10005</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>9.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40296</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>15.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40297</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>24.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40298</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>14.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40299</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>17.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40300</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>14.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1200 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       R  F     M\n",
       "cumid            \n",
       "10001  1  1  19.0\n",
       "10002  0  0  13.0\n",
       "10003  0  0  17.0\n",
       "10004  0  1  18.0\n",
       "10005  0  0   9.0\n",
       "...   .. ..   ...\n",
       "40296  0  0  15.0\n",
       "40297  0  1  24.0\n",
       "40298  0  1  14.0\n",
       "40299  1  0  17.0\n",
       "40300  0  1  14.0\n",
       "\n",
       "[1200 rows x 3 columns]"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# F 同理 \n",
    "pd.qcut(F_trans['F'],2,retbins=True)[1][1]\n",
    "\n",
    "# 定义F分段函数\n",
    "def F_cut(x):\n",
    "    if x<=pd.qcut(F_trans['F'],2,retbins=True)[1][1]:\n",
    "        return 0\n",
    "    else:\n",
    "        return 1\n",
    "\n",
    "analysis.F = analysis.F.apply(F_cut)\n",
    "analysis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>R</th>\n",
       "      <th>F</th>\n",
       "      <th>M</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cumid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>10001</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10002</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10003</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10004</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10005</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40296</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40297</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40298</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40299</th>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40300</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1200 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       R  F  M\n",
       "cumid         \n",
       "10001  1  1  1\n",
       "10002  0  0  0\n",
       "10003  0  0  1\n",
       "10004  0  1  1\n",
       "10005  0  0  0\n",
       "...   .. .. ..\n",
       "40296  0  0  0\n",
       "40297  0  1  1\n",
       "40298  0  1  0\n",
       "40299  1  0  1\n",
       "40300  0  1  0\n",
       "\n",
       "[1200 rows x 3 columns]"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# M同理\n",
    "pd.qcut(M_trans['M'],2,retbins=True)[1][1]\n",
    "\n",
    "def M_cut(x):\n",
    "    if x<=pd.qcut(M_trans['M'],2,retbins=True)[1][1]:\n",
    "        return 0\n",
    "    else:\n",
    "        return 1\n",
    "\n",
    "analysis.M = analysis.M.apply(M_cut)\n",
    "analysis"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "5.2 算法库"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 3.构建模型，筛选目标客户：进行分段打分\n",
    "# 导入sklearn中的预处理库：主要有编码、分箱、数值化转换……\n",
    "from sklearn import preprocessing"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(cumid\n",
       " 10001     (0.0833, 0.444]\n",
       " 10002    (-0.001, 0.0833]\n",
       " 10003    (-0.001, 0.0833]\n",
       " 10004     (0.0833, 0.444]\n",
       " 10005    (-0.001, 0.0833]\n",
       "                ...       \n",
       " 40296    (-0.001, 0.0833]\n",
       " 40297     (0.0833, 0.444]\n",
       " 40298     (0.0833, 0.444]\n",
       " 40299    (-0.001, 0.0833]\n",
       " 40300     (0.0833, 0.444]\n",
       " Name: F, Length: 1200, dtype: category\n",
       " Categories (2, interval[float64, right]): [(-0.001, 0.0833] < (0.0833, 0.444]],\n",
       " array([0.        , 0.08333333, 0.44444444]))"
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.qcut(F_trans['F'], 2, retbins=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style>#sk-container-id-1 {color: black;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>Binarizer(threshold=0.08333333333333333)</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">Binarizer</label><div class=\"sk-toggleable__content\"><pre>Binarizer(threshold=0.08333333333333333)</pre></div></div></div></div></div>"
      ],
      "text/plain": [
       "Binarizer(threshold=0.08333333333333333)"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "threshold = pd.qcut(F_trans['F'], 2, retbins=True)[1][1]\n",
    "preprocessing.Binarizer(threshold=threshold)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0.11764706],\n",
       "       [0.        ],\n",
       "       [0.0625    ],\n",
       "       ...,\n",
       "       [0.28571429],\n",
       "       [0.05882353],\n",
       "       [0.09090909]])"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "F_trans['F'].values.reshape(-1, 1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>R</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cumid</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>10001</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10002</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10003</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10004</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10005</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40296</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40297</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40298</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40299</th>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40300</th>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1200 rows × 1 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "       R\n",
       "cumid   \n",
       "10001  0\n",
       "10002  1\n",
       "10003  1\n",
       "10004  1\n",
       "10005  1\n",
       "...   ..\n",
       "40296  1\n",
       "40297  1\n",
       "40298  1\n",
       "40299  0\n",
       "40300  1\n",
       "\n",
       "[1200 rows x 1 columns]"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#通过等频分箱 取出阈值\n",
    "threshold = pd.qcut(R_trans[\"R\"], q=2, retbins=True)[1][1]\n",
    "#用取出的阈值通过Binarizer进行二值化 01转换，相当于01编码\n",
    "binarizer = preprocessing.Binarizer(threshold=threshold) #实例化\n",
    "R_q = pd.DataFrame(binarizer.transform(R_trans[\"R\"].values.reshape(-1, 1)))\n",
    "R_q.index=R_trans.index\n",
    "R_q.columns=[\"R\"]\n",
    "R_q"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>F</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cumid</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>10001</th>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10002</th>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10003</th>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10004</th>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10005</th>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40296</th>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40297</th>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40298</th>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40299</th>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40300</th>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1200 rows × 1 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         F\n",
       "cumid     \n",
       "10001  1.0\n",
       "10002  0.0\n",
       "10003  0.0\n",
       "10004  1.0\n",
       "10005  0.0\n",
       "...    ...\n",
       "40296  0.0\n",
       "40297  1.0\n",
       "40298  1.0\n",
       "40299  0.0\n",
       "40300  1.0\n",
       "\n",
       "[1200 rows x 1 columns]"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# F同理\n",
    "threshold = pd.qcut(F_trans['F'], 2, retbins=True)[1][1]\n",
    "binarizer = preprocessing.Binarizer(threshold=threshold)\n",
    "F_q = pd.DataFrame(binarizer.transform(F_trans['F'].values.reshape(-1, 1))) \n",
    "F_q.index=F_trans.index\n",
    "F_q.columns=[\"F\"]\n",
    "F_q"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>M</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cumid</th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>10001</th>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10002</th>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10003</th>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10004</th>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10005</th>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40296</th>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40297</th>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40298</th>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40299</th>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40300</th>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>1200 rows × 1 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "         M\n",
       "cumid     \n",
       "10001  1.0\n",
       "10002  0.0\n",
       "10003  1.0\n",
       "10004  1.0\n",
       "10005  0.0\n",
       "...    ...\n",
       "40296  0.0\n",
       "40297  1.0\n",
       "40298  0.0\n",
       "40299  1.0\n",
       "40300  0.0\n",
       "\n",
       "[1200 rows x 1 columns]"
      ]
     },
     "execution_count": 44,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# M同理\n",
    "threshold = pd.qcut(M_trans['M'], 2, retbins=True)[1][1]\n",
    "binarizer = preprocessing.Binarizer(threshold=threshold)\n",
    "M_q = pd.DataFrame(binarizer.transform(M_trans['M'].values.reshape(-1, 1)))\n",
    "M_q.index=M_trans.index\n",
    "M_q.columns=[\"M\"]\n",
    "M_q"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>F</th>\n",
       "      <th>M</th>\n",
       "      <th>R</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cumid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>10001</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10002</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10003</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10004</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10005</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         F    M  R\n",
       "cumid             \n",
       "10001  1.0  1.0  0\n",
       "10002  0.0  0.0  1\n",
       "10003  0.0  1.0  1\n",
       "10004  1.0  1.0  1\n",
       "10005  0.0  0.0  1"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "analysis1=pd.concat([F_q, M_q,R_q], axis=1)\n",
    "analysis1 = analysis1[['F','M','R']]\n",
    "analysis1.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "6. RFM定性输出"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [],
   "source": [
    "label = {\n",
    "    (0,0,0):'无兴趣-低价值-沉默',\n",
    "    (1,0,0):'有兴趣-低价值-沉默',\n",
    "    (1,0,1):'有兴趣-低价值-活跃',\n",
    "    (0,0,1):'无兴趣-低价值-活跃',\n",
    "    (0,1,0):'无兴趣-高价值-沉默',\n",
    "    (1,1,0):'有兴趣-高价值-沉默',\n",
    "    (1,1,1):'有兴趣-高价值-活跃',\n",
    "    (0,1,1):'无兴趣-高价值-活跃'\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\87205\\AppData\\Local\\Temp\\ipykernel_36924\\2782579366.py:1: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
      "  analysis['label'] = analysis[['F','M','R']].apply(lambda x: label[(x[0],x[1],x[2])], axis = 1)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>R</th>\n",
       "      <th>F</th>\n",
       "      <th>M</th>\n",
       "      <th>label</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cumid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>10001</th>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>有兴趣-高价值-活跃</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10002</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>无兴趣-低价值-沉默</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10003</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>无兴趣-高价值-沉默</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10004</th>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>有兴趣-高价值-沉默</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10005</th>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>无兴趣-低价值-沉默</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       R  F  M       label\n",
       "cumid                     \n",
       "10001  1  1  1  有兴趣-高价值-活跃\n",
       "10002  0  0  0  无兴趣-低价值-沉默\n",
       "10003  0  0  1  无兴趣-高价值-沉默\n",
       "10004  0  1  1  有兴趣-高价值-沉默\n",
       "10005  0  0  0  无兴趣-低价值-沉默"
      ]
     },
     "execution_count": 47,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "analysis['label'] = analysis[['F','M','R']].apply(lambda x: label[(x[0],x[1],x[2])], axis = 1) \n",
    "analysis.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\87205\\AppData\\Local\\Temp\\ipykernel_36924\\258869215.py:1: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`\n",
      "  analysis1['label'] = analysis1[['F','M','R']].apply(lambda x: label[(x[0],x[1],x[2])], axis = 1)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>F</th>\n",
       "      <th>M</th>\n",
       "      <th>R</th>\n",
       "      <th>label</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>cumid</th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "      <th></th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>10001</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0</td>\n",
       "      <td>有兴趣-高价值-沉默</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10002</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>无兴趣-低价值-活跃</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10003</th>\n",
       "      <td>0.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>无兴趣-高价值-活跃</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10004</th>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "      <td>有兴趣-高价值-活跃</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10005</th>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>1</td>\n",
       "      <td>无兴趣-低价值-活跃</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         F    M  R       label\n",
       "cumid                         \n",
       "10001  1.0  1.0  0  有兴趣-高价值-沉默\n",
       "10002  0.0  0.0  1  无兴趣-低价值-活跃\n",
       "10003  0.0  1.0  1  无兴趣-高价值-活跃\n",
       "10004  1.0  1.0  1  有兴趣-高价值-活跃\n",
       "10005  0.0  0.0  1  无兴趣-低价值-活跃"
      ]
     },
     "execution_count": 48,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "analysis1['label'] = analysis1[['F','M','R']].apply(lambda x: label[(x[0],x[1],x[2])], axis = 1) \n",
    "analysis1.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "310",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
